Set Options

knitr::opts_chunk$set(
  warning = TRUE, # show warnings during codebook generation
  message = TRUE, # show messages during codebook generation
  error = TRUE, # do not interrupt codebook generation in case of errors,
                # usually better for debugging
  echo = TRUE  # show R code
)
ggplot2::theme_set(ggplot2::theme_bw())

library(rio)
library(labelled)

Prep Data

library(codebook)
## 
## Attaching package: 'codebook'
## The following object is masked from 'package:labelled':
## 
##     to_factor
codebook_data <- import("../data_processing/output_data/priming_data/sr_prime_trials.csv")

# cat(paste(names(codebook_data), collapse = " = '', \n"))

var_label(codebook_data) <- list(
  word_combo = 'The combination of the target word with the cue word - used to help combine information across trials.', 
  unique_trial = 'Unique trial number for each participant - was used to help make sure the cue-target lined up correctly since they were presented sequentially.', 
  observation = "Unique participant ID number.",
  cue_word = 'The cue word shown in the priming trial. ', 
  cue_type = 'The type of trial shown for the cue - should always be word for priming trials.', 
  cue_correct = 'If the cue was answered correctly.', 
  trial_order = 'What order the trials were shown in - used with the unique_trial to line up trials.', 
  target_duration = "The duration in milliseconds of the entire trial from time shown to time end. This variable was set to NA if the trial was incorrectly answered, too long (3000+ms) or too short (<=160ms). The original duration is also preserved.",
  target_word = 'The target word shown to the participant.', 
  target_type = 'The type of trial shown for the target - should always be word for priming trials.', 
  target_correct = 'If the target was answered correctly.', 
  target_Z_RT = 'The z-scored RT for the ', 
  keep_target = 'If the trial level data should be kept based on our exclusion rules (not too long < 3000 ms, not too short > 160ms, correctly answered).', 
  keep_participant = 'If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials SEEN.', 
  keep_participant_answered = 'If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials ANSWERED.',
  cue_end_of_trial = 'How the trial ended as response or timeout.', 
  target_end_of_trial = 'How the trial ended as response or timeout.', 
  target_original_duration = "The duration in milliseconds of the entire trial from time shown to time end without our exclusions applied (i.e., all data).",
  type = 'If the trial type was related or unrelated. '
)

metadata(codebook_data)$name <- "Semantic Priming Across Many Languages Priming Level Trials"
metadata(codebook_data)$description <- "This dataset includes the paired trial level data for the priming trials in the SPAML study. The data has been taken from long format where each trial is one row of data, to semi-long format, wherein each paired trial is one line of data. For example if a cue was CAT and the target was DOG, the full trial data includes each as a separate row. This dataset pairs them together to denote which combinations were related and unrelated, as well as which words were targets and cues. 

Semantic priming has been studied for nearly 50 years across various experimental manipulations and theoretical frameworks. These studies provide insight into the cognitive underpinnings of semantic representations in both healthy and clinical populations; however, they have suffered from several issues including generally low sample sizes and a lack of diversity in linguistic implementations. Here, we will test the size and the variability of the semantic priming effect across ten languages by creating a large database of semantic priming values, based on an adaptive sampling procedure. Differences in response latencies between related word-pair conditions and unrelated word-pair conditions (i.e., difference score confidence interval is greater than zero) will allow quantifying evidence for semantic priming, whereas improvements in model fit with the addition of a random intercept for language will provide support for variability in semantic priming across languages."
metadata(codebook_data)$identifier <- "https://doi.org/10.5281/zenodo.10888833"
metadata(codebook_data)$creator <- "Erin M. Buchanan"
metadata(codebook_data)$citation <- "Buchanan, E., Cuccolo, K., Heyman, T., Iyer, A., Coles, N., Lewis Jr, N., Peters, K., van Berkel, N., Taylor, J., Van't Veer, A. E., Montefinese, M., Valentine, K. D., Maxwell, N., Türkan, B. N., Williams, G., Oliveros-Chacana, J. C., Röer, J., Fini, C., Acar, O., … Lewis, S. C. (2024). SemanticPriming/SPAML: SPAML v1 Data Release (v1.0.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.10888833"
metadata(codebook_data)$url <- "https://github.com/SemanticPriming/SPAML/releases/"
metadata(codebook_data)$datePublished <- "2024-05-01"
metadata(codebook_data)$temporalCoverage <- "2022-2024" 
metadata(codebook_data)$spatialCoverage <- "Online" 

Create codebook

codebook(codebook_data)

Metadata

Description

Dataset name: Semantic Priming Across Many Languages Priming Level Trials

This dataset includes the paired trial level data for the priming trials in the SPAML study. The data has been taken from long format where each trial is one row of data, to semi-long format, wherein each paired trial is one line of data. For example if a cue was CAT and the target was DOG, the full trial data includes each as a separate row. This dataset pairs them together to denote which combinations were related and unrelated, as well as which words were targets and cues.

Semantic priming has been studied for nearly 50 years across various experimental manipulations and theoretical frameworks. These studies provide insight into the cognitive underpinnings of semantic representations in both healthy and clinical populations; however, they have suffered from several issues including generally low sample sizes and a lack of diversity in linguistic implementations. Here, we will test the size and the variability of the semantic priming effect across ten languages by creating a large database of semantic priming values, based on an adaptive sampling procedure. Differences in response latencies between related word-pair conditions and unrelated word-pair conditions (i.e., difference score confidence interval is greater than zero) will allow quantifying evidence for semantic priming, whereas improvements in model fit with the addition of a random intercept for language will provide support for variability in semantic priming across languages.

Metadata for search engines
name value
1 Erin M. Buchanan
x
word_combo
unique_trial
observation
cue_word
cue_type
cue_correct
trial_order
target_duration
target_word
target_type
target_correct
target_Z_RT
keep_target
keep_participant
keep_participant_answered
cue_end_of_trial
target_end_of_trial
target_original_duration
type

#Variables

word_combo

The combination of the target word with the cue word - used to help combine information across trials.

Distribution

Distribution of values for word_combo

Distribution of values for word_combo

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
word_combo The combination of the target word with the cue word - used to help combine information across trials. character 0 1 2000 0 4 29 0

unique_trial

Unique trial number for each participant - was used to help make sure the cue-target lined up correctly since they were presented sequentially.

Distribution

Distribution of values for unique_trial

Distribution of values for unique_trial

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
unique_trial Unique trial number for each participant - was used to help make sure the cue-target lined up correctly since they were presented sequentially. character 0 1 95598 0 16 18 0

observation

Unique participant ID number.

Distribution

Distribution of values for observation

Distribution of values for observation

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
observation Unique participant ID number. character 0 1 681 0 14 14 0

cue_word

The cue word shown in the priming trial.

Distribution

Distribution of values for cue_word

Distribution of values for cue_word

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
cue_word The cue word shown in the priming trial. character 0 1 989 0 2 17 0

cue_type

The type of trial shown for the cue - should always be word for priming trials.

Distribution

Distribution of values for cue_type

Distribution of values for cue_type

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
cue_type The type of trial shown for the cue - should always be word for priming trials. character 0 1 1 0 4 4 0

cue_correct

If the cue was answered correctly.

Distribution

Distribution of values for cue_correct

Distribution of values for cue_correct

241 missing values.

Summary statistics

name label data_type n_missing complete_rate count mean
cue_correct If the cue was answered correctly. logical 241 0.997479 TRU: 91375, FAL: 3982 0.9582411

trial_order

What order the trials were shown in - used with the unique_trial to line up trials.

Distribution

Distribution of values for trial_order

Distribution of values for trial_order

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
trial_order What order the trials were shown in - used with the unique_trial to line up trials. numeric 0 1 1 194 400 196.3604 115.2478 ▇▇▇▇▇

target_duration

The duration in milliseconds of the entire trial from time shown to time end. This variable was set to NA if the trial was incorrectly answered, too long (3000+ms) or too short (<=160ms). The original duration is also preserved.

Distribution

Distribution of values for target_duration

Distribution of values for target_duration

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
target_duration The duration in milliseconds of the entire trial from time shown to time end. This variable was set to NA if the trial was incorrectly answered, too long (3000+ms) or too short (<=160ms). The original duration is also preserved. numeric 0 1 164 641 2974 725.0498 300.8918 ▇▃▁▁▁

target_word

The target word shown to the participant.

Distribution

Distribution of values for target_word

Distribution of values for target_word

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
target_word The target word shown to the participant. character 0 1 987 0 2 19 0

target_type

The type of trial shown for the target - should always be word for priming trials.

Distribution

Distribution of values for target_type

Distribution of values for target_type

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
target_type The type of trial shown for the target - should always be word for priming trials. character 0 1 1 0 4 4 0

target_correct

If the target was answered correctly.

Distribution

Distribution of values for target_correct

Distribution of values for target_correct

0 missing values.

Summary statistics

name label data_type n_missing complete_rate count mean
target_correct If the target was answered correctly. logical 0 1 TRU: 95598 1

target_Z_RT

The z-scored RT for the

Distribution

Distribution of values for target_Z_RT

Distribution of values for target_Z_RT

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
target_Z_RT The z-scored RT for the numeric 0 1 -2.8 -0.64 11 -0.4523956 0.7619565 ▇▂▁▁▁

keep_target

If the trial level data should be kept based on our exclusion rules (not too long < 3000 ms, not too short > 160ms, correctly answered).

Distribution

Distribution of values for keep_target

Distribution of values for keep_target

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
keep_target If the trial level data should be kept based on our exclusion rules (not too long < 3000 ms, not too short > 160ms, correctly answered). character 0 1 1 0 4 4 0

keep_participant

If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials SEEN.

Distribution

Distribution of values for keep_participant

Distribution of values for keep_participant

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
keep_participant If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials SEEN. character 0 1 1 0 4 4 0

keep_participant_answered

If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials ANSWERED.

Distribution

Distribution of values for keep_participant_answered

Distribution of values for keep_participant_answered

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
keep_participant_answered If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials ANSWERED. character 0 1 1 0 4 4 0

cue_end_of_trial

How the trial ended as response or timeout.

Distribution

Distribution of values for cue_end_of_trial

Distribution of values for cue_end_of_trial

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
cue_end_of_trial How the trial ended as response or timeout. character 0 1 2 0 7 8 0

target_end_of_trial

How the trial ended as response or timeout.

Distribution

Distribution of values for target_end_of_trial

Distribution of values for target_end_of_trial

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
target_end_of_trial How the trial ended as response or timeout. character 0 1 1 0 8 8 0

target_original_duration

The duration in milliseconds of the entire trial from time shown to time end without our exclusions applied (i.e., all data).

Distribution

Distribution of values for target_original_duration

Distribution of values for target_original_duration

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
target_original_duration The duration in milliseconds of the entire trial from time shown to time end without our exclusions applied (i.e., all data). numeric 0 1 164 641 2974 725.0498 300.8918 ▇▃▁▁▁

type

If the trial type was related or unrelated.

Distribution

Distribution of values for type

Distribution of values for type

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
type If the trial type was related or unrelated. character 0 1 2 0 7 9 0

Missingness report

Codebook table

JSON-LD metadata

The following JSON-LD can be found by search engines, if you share this codebook publicly on the web.

{
  "name": "Semantic Priming Across Many Languages Priming Level Trials",
  "description": "This dataset includes the paired trial level data for the priming trials in the SPAML study. The data has been taken from long format where each trial is one row of data, to semi-long format, wherein each paired trial is one line of data. For example if a cue was CAT and the target was DOG, the full trial data includes each as a separate row. This dataset pairs them together to denote which combinations were related and unrelated, as well as which words were targets and cues. \n\nSemantic priming has been studied for nearly 50 years across various experimental manipulations and theoretical frameworks. These studies provide insight into the cognitive underpinnings of semantic representations in both healthy and clinical populations; however, they have suffered from several issues including generally low sample sizes and a lack of diversity in linguistic implementations. Here, we will test the size and the variability of the semantic priming effect across ten languages by creating a large database of semantic priming values, based on an adaptive sampling procedure. Differences in response latencies between related word-pair conditions and unrelated word-pair conditions (i.e., difference score confidence interval is greater than zero) will allow quantifying evidence for semantic priming, whereas improvements in model fit with the addition of a random intercept for language will provide support for variability in semantic priming across languages.\n\n\n## Table of variables\nThis table contains variable names, labels, and number of missing values.\nSee the complete codebook for more.\n\n[truncated]\n\n### Note\nThis dataset was automatically described using the [codebook R package](https://rubenarslan.github.io/codebook/) (version 0.9.2).",
  "identifier": "https://doi.org/10.5281/zenodo.10888833",
  "creator": "Erin M. Buchanan",
  "citation": "Buchanan, E., Cuccolo, K., Heyman, T., Iyer, A., Coles, N., Lewis Jr, N., Peters, K., van Berkel, N., Taylor, J., Van't Veer, A. E., Montefinese, M., Valentine, K. D., Maxwell, N., Türkan, B. N., Williams, G., Oliveros-Chacana, J. C., Röer, J., Fini, C., Acar, O., … Lewis, S. C. (2024). SemanticPriming/SPAML: SPAML v1 Data Release (v1.0.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.10888833",
  "url": "https://github.com/SemanticPriming/SPAML/releases/",
  "datePublished": "2024-05-01",
  "temporalCoverage": "2022-2024",
  "spatialCoverage": "Online",
  "keywords": ["word_combo", "unique_trial", "observation", "cue_word", "cue_type", "cue_correct", "trial_order", "target_duration", "target_word", "target_type", "target_correct", "target_Z_RT", "keep_target", "keep_participant", "keep_participant_answered", "cue_end_of_trial", "target_end_of_trial", "target_original_duration", "type"],
  "@context": "http://schema.org/",
  "@type": "Dataset",
  "variableMeasured": [
    {
      "name": "word_combo",
      "description": "The combination of the target word with the cue word - used to help combine information across trials.",
      "@type": "propertyValue"
    },
    {
      "name": "unique_trial",
      "description": "Unique trial number for each participant - was used to help make sure the cue-target lined up correctly since they were presented sequentially.",
      "@type": "propertyValue"
    },
    {
      "name": "observation",
      "description": "Unique participant ID number.",
      "@type": "propertyValue"
    },
    {
      "name": "cue_word",
      "description": "The cue word shown in the priming trial. ",
      "@type": "propertyValue"
    },
    {
      "name": "cue_type",
      "description": "The type of trial shown for the cue - should always be word for priming trials.",
      "@type": "propertyValue"
    },
    {
      "name": "cue_correct",
      "description": "If the cue was answered correctly.",
      "@type": "propertyValue"
    },
    {
      "name": "trial_order",
      "description": "What order the trials were shown in - used with the unique_trial to line up trials.",
      "@type": "propertyValue"
    },
    {
      "name": "target_duration",
      "description": "The duration in milliseconds of the entire trial from time shown to time end. This variable was set to NA if the trial was incorrectly answered, too long (3000+ms) or too short (<=160ms). The original duration is also preserved.",
      "@type": "propertyValue"
    },
    {
      "name": "target_word",
      "description": "The target word shown to the participant.",
      "@type": "propertyValue"
    },
    {
      "name": "target_type",
      "description": "The type of trial shown for the target - should always be word for priming trials.",
      "@type": "propertyValue"
    },
    {
      "name": "target_correct",
      "description": "If the target was answered correctly.",
      "@type": "propertyValue"
    },
    {
      "name": "target_Z_RT",
      "description": "The z-scored RT for the ",
      "@type": "propertyValue"
    },
    {
      "name": "keep_target",
      "description": "If the trial level data should be kept based on our exclusion rules (not too long < 3000 ms, not too short > 160ms, correctly answered).",
      "@type": "propertyValue"
    },
    {
      "name": "keep_participant",
      "description": "If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials SEEN.",
      "@type": "propertyValue"
    },
    {
      "name": "keep_participant_answered",
      "description": "If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials ANSWERED.",
      "@type": "propertyValue"
    },
    {
      "name": "cue_end_of_trial",
      "description": "How the trial ended as response or timeout.",
      "@type": "propertyValue"
    },
    {
      "name": "target_end_of_trial",
      "description": "How the trial ended as response or timeout.",
      "@type": "propertyValue"
    },
    {
      "name": "target_original_duration",
      "description": "The duration in milliseconds of the entire trial from time shown to time end without our exclusions applied (i.e., all data).",
      "@type": "propertyValue"
    },
    {
      "name": "type",
      "description": "If the trial type was related or unrelated. ",
      "@type": "propertyValue"
    }
  ]
}`